library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.1     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 3.6.2
## Warning: package 'tibble' was built under R version 3.6.2
## Warning: package 'tidyr' was built under R version 3.6.2
## Warning: package 'readr' was built under R version 3.6.2
## Warning: package 'purrr' was built under R version 3.6.2
## Warning: package 'dplyr' was built under R version 3.6.2
## Warning: package 'forcats' was built under R version 3.6.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(here)
## Warning: package 'here' was built under R version 3.6.2
## here() starts at /Users/ryanharrington/Dropbox/00 - PCS Course/Example/covid_exploration
source(here::here("functions.R"))

theme_set(theme_minimal())

Read in Data

Read in data

# Link for confirmed cases
confirmed <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"

# Link for confirmed deaths
deaths <- "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv"
confirmed_df <-
  read_csv(confirmed) %>% 
  mutate(Type = "Confirmed")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   iso2 = col_character(),
##   iso3 = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Combined_Key = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
deaths_df <-
  read_csv(deaths) %>% 
  mutate(Type = "Deaths")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   iso2 = col_character(),
##   iso3 = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Combined_Key = col_character()
## )
## ℹ Use `spec()` for the full column specifications.

Create a population dataframe

population_df <- 
  deaths_df %>% 
  select(UID, iso2, iso3, code3, FIPS, Admin2, Province_State, Country_Region, Lat, Long_, Combined_Key, Population)

Add population to confirmed

confirmed_df <-
  confirmed_df %>% 
  left_join(population_df,
            by = c("UID", "iso2", "iso3", "code3", "FIPS", "Admin2", "Province_State", "Country_Region", "Lat", "Long_", "Combined_Key")
  ) %>% 
  select(UID, iso2, iso3, code3, FIPS, Admin2, Province_State, Country_Region, Lat, Long_, Combined_Key, Population, everything())

Union confirmed cases and deaths together

covid <- 
  union(confirmed_df, deaths_df) %>% 
  select(UID, iso2, iso3, code3, FIPS, Admin2, Province_State, Country_Region, Lat, Long_, Combined_Key, Type, Population, everything())

Check County Data

New Castle

covid %>% 
  filter(Admin2 == "New Castle")

Kent

covid %>% 
  filter(Admin2 == "Kent",
         Province_State == "Delaware")

Sussex

covid %>% 
  filter(Admin2 == "Sussex",
         Province_State == "Delaware")

Delaware

covid %>% 
  filter(Province_State == "Delaware")

Toy functions

Tell me today’s date

With explicit return

tell_me_todays_date <- function() {
  
  # Body of the function
  todays_date <- format(Sys.Date(), format = "%B %d, %Y")
  
  sentence <- paste0("Hello! Today's date is ", todays_date, "!")
    
  return(sentence)
  
}

With implicit return

tell_me_todays_date <- function() {
  
  # Body of the function
  todays_date <- format(Sys.Date(), format = "%B %d, %Y")
  
  paste0("Hello! Today's date is ", todays_date, "!")
  
}
tell_me_todays_date()
## [1] "Hello! Today's date is November 08, 2021!"
tell_me_todays_date()
## [1] "Hello! Today's date is November 08, 2021!"

Distance from the mean

set.seed(1231)

nums <- sample(1:100, 25, replace = T)

nums
##  [1]  74  47  12  60  36  53  60  40  62  46  61  10  26  41  26  92  83  19  12
## [20]  84  38  11  74  43 100
nums - mean(nums, na.rm = T)
##  [1]  25.6  -1.4 -36.4  11.6 -12.4   4.6  11.6  -8.4  13.6  -2.4  12.6 -38.4
## [13] -22.4  -7.4 -22.4  43.6  34.6 -29.4 -36.4  35.6 -10.4 -37.4  25.6  -5.4
## [25]  51.6
calc_mean_dist <- function(values) {
  
  dist <- values - mean(values, na.rm = T)
  
  return(dist)
  
}
calc_mean_dist(values = nums)
##  [1]  25.6  -1.4 -36.4  11.6 -12.4   4.6  11.6  -8.4  13.6  -2.4  12.6 -38.4
## [13] -22.4  -7.4 -22.4  43.6  34.6 -29.4 -36.4  35.6 -10.4 -37.4  25.6  -5.4
## [25]  51.6
calc_mean_dist(1:10)
##  [1] -4.5 -3.5 -2.5 -1.5 -0.5  0.5  1.5  2.5  3.5  4.5
calc_mean_dist(seq(from = 1, to = 173, by = 5))
##  [1] -85 -80 -75 -70 -65 -60 -55 -50 -45 -40 -35 -30 -25 -20 -15 -10  -5   0   5
## [20]  10  15  20  25  30  35  40  45  50  55  60  65  70  75  80  85

Function for filtering counties

get_county(covid, "New Castle")
get_county(covid, "Kent")
get_county(covid, "Sussex")
get_county(covid, "All counties")
get_county(covid, "All counties", "Maryland")

Continue with Data Cleaning

covid %>% 
  get_county("New Castle") %>% 
  tidy_covid()
covid %>% 
  get_county("Kent", "Maryland") %>% 
  tidy_covid()
covid %>% 
  get_county("Monmouth", "New Jersey") %>% 
  tidy_covid()
covid %>% 
  get_county("All counties") %>% 
  tidy_covid()

Adjusting from cumulative to daily values

covid %>% 
  get_county("All counties") %>% 
  tidy_covid()

Graphing our data

covid %>% 
  get_county("All counties") %>% 
  tidy_covid() %>% 
  ggplot(aes(x = Date,
             y = Confirmed,
             color = Admin2,
             group = Admin2)) +
  geom_line() +
  scale_y_continuous(labels = scales::comma) +
  theme(legend.position = "top",
        axis.title = element_text(face = "bold"),
        axis.text = element_text(face = "italic")) +
  labs(color = "")

covid %>% 
  get_county("All counties") %>% 
  tidy_covid() %>% 
  graph_covid_over_time(Confirmed_DoD)